

*** COMPILATION OF THE DATASET

* In this file, we clean the production data, add various additional datasets to this dataset, and generate the production function variables.

***************************
set more off
cd "$station"
ssc install mdesc

** 0. CREATE DATASET OF CONTEXTUAL INDICES
******************************************


import excel ".\data\indices.xlsx", clear firstrow

** Organize variables **********************************************************

drop if year > 1913

qui sum scholliers_mining if year==1910
local base = r(mean)
qui sum gadmin_avgwage if year==1910
local base2 = r(mean)

gen nomwage_mine = (scholliers_mining/`base')*100	//Use data from Scholliers
gen nomwage_mine2 = (gadmin_avgwage/`base2')*100	//Gadisseur data nearly identical


//Wages
gen realwage_industry = segers_nomwage_industry *(cpi_segers/100)
gen realwage_mine = nomwage_mine *(cpi_segers/100)
label variable realwage_industry "Manufacturing (inc. coal mining)"
label variable realwage_mine "Coal mining"

//Capital
label variable gadmin_machinesN "Number of machines"
label variable gadmin_machinesHP "Total horsepower"

//Production
gen productivity = (gadmin_production/gadmin_workersN)*1000
label variable gadmin_production "Production"
label variable productivity "Productivity (per worker)"

//Employment
gen share_male_sec = (buyst_male_mining/buyst_male_secondary)*100
gen share_male_tot = (buyst_male_mining/buyst_male_total)*100
label variable share_male_sec "Male workers - Manufacturing employment"
label variable share_male_tot "Male workers - Total employment"
gen share_total_sec = (buyst_total_mining/buyst_total_secondary)*100
gen share_total_tot = (buyst_total_mining/buyst_total_total)*100
label variable share_total_sec "All workers - Manufacturing employment"
label variable share_total_tot "All workers - Total employment"

gen share2_total_sec = (gadmin_workersN/buyst_total_secondary)*100
gen share2_total_tot = (gadmin_workersN/buyst_total_total)*100
label variable share2_total_sec "Share of manufacturing employment"
label variable share2_total_tot "Share of total employment"


//Prices
gen realprice = wibail_price *(cpi_segers/100)
gen realimportprice = (degreve_imp_pq/degreve_imp_q)*(cpi_segers/100)*1000
gen realexportprice = (degreve_exp_pq/degreve_exp_q)*(cpi_segers/100)*1000
label variable realprice "Domestic price"
label variable realexportprice "Export price"



** Save data********* **********************************************************

save ./data/temp/indices, replace



** I. ADMINISTRATION DATA 
******************************************


** I.2 FROM DATA SHEETS TO STATA
******************************************

cd "./data/production"
** Bring to data sheets in its various layouts to Stata format
run "DO_1_Excel_to_Stata_101-107_179-199.do"
run "DO_1_Excel_to_Stata_108-113_200-204.do"
run "DO_1_Excel_to_Stata_114-118.do"
run "DO_1_Excel_to_Stata_118-124.do"

** Bring the various layouts to one common dataset
run "DO_2_Merge_data.do"

** This file was used for further controls of the manual data entry and harmonization
** This file saves the raw dataset "data_adm.dta"
run "DO_3_Clean_data.do"

cd "$station"

** I.2 ADMINISTRATION DATA: FURTHER DATA CLEANING 
******************************************

use ./data/production/data_adm, clear		// raw data set: annual reports by the Administration des Mines 

set more off


** Renaming and creating variables
 
rename (q_total_t l_surface_total_n l_under_total_n l_surface_total_w  l_under_total_w)(q   sd ud uwsd uwud )	// output, number of surface and underground workers, daily wage per worker
rename year yr
 
* Daily wages after 1899

replace uwsd = l_surface_total_w_net   if uwsd==.	
replace uwud = l_under_total_w_net if uwud==.

* Impute days worked from the data:

gen wsd = uwsd*sd	// daily labor cost for surface workers  = daily surface wage * average number of surface workers
gen wud = uwud*ud	// same for underground workers

gen ndays = expenses_l / (wsd+wud)		// number of days worked = total labor cost / daily labor cost
sum ndays avgdays_mine					// we observe days worked for a subset of years. Closely resembles our imputed measure closely

replace ndays = avgdays_mine if avgdays_mine~=.

* Labor

gen s = sd*ndays		// number of days worked by surface workers
gen u = ud*ndays		// number of days worked by underground workers
gen emp = u+s 		// total number of worker-days

gen ws = uwsd*s			// labor cost of surface workers = daily wage * days worked
gen wu = uwud*u			// same for underground workers

* Revenue and prices

gen rev = q_total_pq
gen p = rev/q
bys yr: sum rev p

rename id mineid		// unique mine id
bys mineid: egen avdist = mean(district)	// district codes not used because time-variant
sum avdist

replace balance_gain = 0 if balance_gain==.
replace balance_loss = 0 if balance_loss==.

* Profits

gen prof = balance_gain-balance_loss
sum prof , d

bys yr: egen avprof = mean(prof)
bys yr: egen agprof = sum(prof)
twoway connect avprof yr
twoway connect agprof yr if agprof <10000000
 
* Horses 
 
replace horses_under = 0 if horses_under==.
replace horses_surf = 0 if horses_surf==.
gen h = horses_under + horses_surf					// total number of horses used
replace h = . if yr>=1900							// variable not observed after 1900
gen lh = log(h)
 
** Expenses

bys yr: egen lc = sum(expenses_l)
foreach var of varlist expenses* {
replace `var' = 0 if  `var'==.
}
 
gen wl = expenses_l
 
* Investment

gen dif = expenses_other - expenses_prep if yr<1872	 // checking whether expenses_prep a part of expenses_other (before 1873)
sum dif, d	// they are not, because cannot have negative expenses...
drop dif
 
gen winv = expenses_spc_total						// between 1873 and 1899, investment = 'expenses extraordinaires'
replace winv = expenses_spc_prep  if yr>=1900		// after 1900, investment = 'expenses preparatoires'
replace winv = expenses_prep if yr<=1872			// before 1872, investment = 'expenses preparatoires'
bys yr: sum winv

replace winv = . if yr==1913	// no investment data for 1913
bys yr: egen avwinv = mean(winv)
twoway connect avwinv yr, xline(1872 1900)	// seems correct

* Intermediate inputs expenditure

bys yr: sum expenses_other
gen wm = expenses_other if yr<=1872 | yr>=1900			// up to 1872 and after 1900, material expenditure = 'autre expenses'
replace wm = expenses_std_other if yr>=1873 & yr<1900	// in between 1873 and 1900, material expenditure = 'autres expenses ordinaires' 
replace wm = 0 if wm==.													

bys yr: egen avwm = mean(wm)
twoway connect avwm yr, xline(1872 1900)		// visual check.
  
gen tc = wm+winv+wl								// total costs

sum tc expenses_total , d 						// comparing our total cost measure and the one reported in the data
corr tc expenses_total 							// our total cost measure lines up with the reported total costs in the raw data.
corr tc expenses_total if yr>=1873 & yr<1900	 
corr tc expenses_total if yr<1873 | yr>=1900	 
 
gen lwm = log(wm)

* cost shares
gen cm = wm/(wl+wm+winv)
gen cl = wl/(wl+wm+winv)
gen ck = winv/(wl+wm+winv)
foreach var of varlist cm cl ck {
bys yr: egen av`var' = mean(`var')
bys yr: egen med`var' = median(`var')
}

twoway connect avcm yr , xline(1872 1900)
twoway connect avcl yr , xline(1868 1900)
twoway connect avck yr , xline(1872 1900)
 
** Drop duplicates

sum mineid 
 
duplicates tag mine_id yr , gen(duptag)

sort mine_id yr duptag 
brow duptag mine_id yr 

sort mine_id yr mine_community arrondissement
bys mine_id yr: gen dup = _n
drop if dup>1
drop dup duptag 

xtset mineid yr 
 
** Deflating all monetary variables

gen year = yr
merge m:1 year using ./data/temp/indices
gen cpi = cpi_segers/100
twoway connect cpi yr
drop if _merge==2
drop _merge year
xtset mineid yr
replace cpi = F.cpi*87/89 if yr==1845 		// follows CPI from scholliers 1995 for 1845
 
foreach var of varlist rev prof winv wm ws wu expenses_l wl  uw* p   tc {
replace `var' = `var'*cpi
}
  
 
** Technology

gen kcut = k_excavation_hp
gen kloc = k_extraction_hp
gen kven = k_ventilation_hp
gen kot = k_other_hp
 
replace kloc = 0 if k_extraction_hp==.
replace kcut = 0 if k_excavation_hp==.
replace kven = 0 if k_ventilation_hp==.
replace kot = 0 if k_other_hp==.

foreach var of varlist kloc kcut kven kot {
replace `var' = . if yr>1899		// none of the machine indicators observed after 1899
}
foreach var of varlist   kven kot {
replace `var' = . if yr<1873		// ventilation and 'other machines' observed from 1873-1899
}
 
save ./Data/temp/data_adm_clean, replace		// cleaned dataset of 'Administration' annual reports


 
** II. UNION MEMBERSHIP DATA
******************************************


// Prepare the union membership data

**
qui{
//Load
foreach year of numlist 1869/1907 {
capture import excel "./data/cartel/Membership Union.xlsx", sheet("`year'") firstrow clear 
gen year = `year'
capture tostring date, replace
capture rename employment* employment
tempfile union`year'
save `union`year''
}

//Merge
use `union1869'
foreach year of numlist 1871/1907 {
append using `union`year''
}
}

gen mine_id_main = ""

   //Angleur (Société des mines métalliques)
replace mine_id_main = "ABH" if company == "Abhooz"   //Abhooz
replace mine_id_main = "ABH-BHA" if company == "Abhooz et Bonne Foi Hareng"   //Abhooz et Bonne Foi Hareng
replace mine_id_main = "ABV" if company == "Avroy-Boverie"   //Avroy-Boverie
replace mine_id_main = "ANG" if company == "Angleur" & company2 == "Charbonnage" //Angleur
replace mine_id_main = "ART-XHO-BLL" if company == "Artiste-Xhorré et Baldaz Lalore"   //Artiste-Xhorré et Baldaz Lalore
replace mine_id_main = "BAT" if company == "Batterie et Bonne-Espérance"   //Batterie et Bonne-Espérance
replace mine_id_main = "BVS" if company == "Belle-Vue" | company == "Bellevue"  //Belle-Vue
replace mine_id_main = "BVB" if company == "Bellevue et Bienvenue"  //Bellevue et Bienvenue
   //Bleyberg-ès-Montzen
replace mine_id_main = "BQT-GOR" if company == "Biqcuet-Gorée"   //Biqcuet-Gorée
replace mine_id_main = "SCL" if company == "Bois-d'Avroy"   //Bois-d'Avroy
replace mine_id_main = "BFN" if company == "Bonne-Fin" | company == "Bonnefin"   //Bonne-Fin
replace mine_id_main = "BHA" if company == "Bonnefoi-Hareng"   //Bonnefoi-Hareng
replace mine_id_main = "BFR" if company == "Bonne-Fortune"   //Bonne-Fortune
replace mine_id_main = "BNR" if company == "Bonnier"   //Bonnier
replace mine_id_main = "CHT-VIO" if company == "Chartreuse et Violette"   //Chartreuse et Violette
replace mine_id_main = "CRT" if company == "Cheratte, Housse et Bouhouille"   //Cheratte, Housse et Bouhouille
replace mine_id_main = "COC" if company == "Société John-Cockerill" | company == "John-Cockerill"  | company == "John Cockerill"  //Société John-Cockerill
replace mine_id_main = "CNC" if company == "Concorde"   //Concorde
replace mine_id_main = "CWT" if company == "Cowette-Rufin"   //Cowette-Rufin
   //Chant d'Oiseaux
replace mine_id_main = "CHY" if company == "Crahay-Maireux"   //Crahay-Maireux
   //Conduites d'eau
   //Couthuin (Société des maîtres de forge)
   //Disière et Delcourt
   //G. Dumont, frères
replace mine_id_main = "ESS" if company == "l'Espérance" | company == "Espérance"  //l'Espérance (Seraing)
replace mine_id_main = "BFR-ESM" if company == "Espérance et Bonne-Fortune"   //Espérance et Bonne-Fortune
replace mine_id_main = "TSR-HLX-HMV" if company == "Est de Liège"   //Est de Liège
replace mine_id_main = "FPQ" if company == "Canal de Fond-Piquette"   //Fond-Piquette
replace mine_id_main = "GSL" if company == "Gosson-Lagasse"   //Gosson-Lagasse
replace mine_id_main = "GBC" if company == "Grande-Bacnure"   //Grande-Bacnure
replace mine_id_main = "GMK" if company == "Grands-Makets"   //Grands-Makets
   //Grivegnée
replace mine_id_main = "FLN" if company == "Prés de Fléron"   //Prés de Fléron
replace mine_id_main = "LHY" if company == "Haye" | company == "La Haye"   //Haye
replace mine_id_main = "HSD" if company == "Hasard"   //Hasard
replace mine_id_main = "HPX" if company == "Herman-Pixherotte"   //Herman-Pixherotte
replace mine_id_main = "HRV-WGF" if company == "Herve et Wergifosse"   //Herve et Wergifosse
replace mine_id_main = "HMV" if company == "Homvent-Maldaccord"   //Homvent-Maldaccord
replace mine_id_main = "HLZ" if company == "Horloz"   //Horloz
   //Jowa et Compagnie
   //Jemeppe (Société des laminoirs)
   //Jupille (Société des laminoirs)
replace mine_id_main = "KSL" if company == "Kessales et Bon-Buveur"   //Kessales et Bon-Buveur
replace mine_id_main = "ANS" if company == "Levant" | company == "Ans"  //Levant
replace mine_id_main = "LNT" if company == "Lonette"   //Lonette
   //Marcellis (Société des laminoirs)
replace mine_id_main = "MRH" if company == "Marihaye"   //Marihaye
replace mine_id_main = "CHY" if company == "Maireux"   //Maireux ("En 1835, la fosse est exploitée par la famille Crahay, qui crée une société civile du Maireux-Bas-Bois.")
   //Membach
replace mine_id_main = "MHX" if company == "Micheroux" | company == "Bois de Micheroux"    //Micheroux
replace mine_id_main = "MIN" if company == "Minerie"   //Minerie
replace mine_id_main = "ESM" if company == "Nouvelle-Espérance"   //Nouvelle-Espérance
replace mine_id_main = "NVM" if company == "Nouvelle-Montagne"   //Nouvelle-Montagne
replace mine_id_main = "ONH" if company == "Onhons et Grandfontaine"   //Onhons et Grandfontaine
replace mine_id_main = "OUG" if company == "Ougrée" & company2 == "Charbonnage et hauts fourneaux" //Ougrée
   //Ougrée (Fabrique de fer)
replace mine_id_main = "OUG # MRH" if company == "Ougrée-Marihaye" //Ougrée-Marihaye (Société métallurgique)
replace mine_id_main = "OHY-LUR" if company == "Oulhaye-Lurtay" //Oulhaye-Lurtay
replace mine_id_main = "PBJ" if company == "Patience et Beaujonc"   //Patience et Beaujonc
replace mine_id_main = "PBC" if company == "Petite Bacnure"   //Petite Bacnure
   //Rocheux-Oneux
replace mine_id_main = "QJN" if company == "Quatre-Jean"   //Quatre-Jean
replace mine_id_main = "SBL" if company == "Sarts au Berleur"   //Sarts au Berleur
replace mine_id_main = "SCL" if company == "Sclessin" & year<=1885  //Sclessin
replace mine_id_main = "SBN" if company == "Six-Bonniers"   //Six-Bonniers
   //Syllié Pauwels, à Barse
replace mine_id_main = "TRE" if company == "Trembleur"   //Trembleur
replace mine_id_main = "VBT" if company == "Val-Benoit et Grand-Bac"   //Val-Benoit et Grand-Bac
replace mine_id_main = "VLN" if company == "Velaine"   //Velaine
replace mine_id_main = "VIM" if company == "Vieille-Montagne"   //Vieille-Montagne
replace mine_id_main = "WDR" if company == "Wandre"   //Wandre
replace mine_id_main = "WER" if company == "Werister" | company == "Wérister"   //Werister


save ./data/temp/membership.dta, replace
//Remark: "OUG # MRH" needs a reshape long before being used in the AdM database.
use ./data/temp/membership.dta, clear
drop if missing(mine_id_main)
keep mine_id_main year 
rename mine_id_main mine_id

**Solve "#" cases
preserve
gen marker = strpos(mine_id, "#")
keep if marker > 0
gen mine_id2 = trim(substr(mine_id,strpos(mine_id, "#")+1,.)) if marker>0
gen mine_id1 = trim(substr(mine_id,1,strpos(mine_id, "#")-1))  if marker>0
drop marker mine_id
reshape long mine_id , i(year) j(nr)
drop nr
tempfile appender
save `appender'
restore
append using `appender'
sort year
drop if strpos(mine_id, "#")>0

egen membership = group(mine_id)
rename mine_id memid
reshape wide memid, i(year) j(membership)
global countmemid = r(k) - 1
return list

rename year yr
tempfile membership
save `membership'


//Load union data into cleaned AdM data
use ./data/temp/data_adm_clean, clear			// the cleaning of the raw data and generation of this data set is included above.
 
 
merge m:1 yr using `membership'
list yr if _merge==2 //OK

//Identify members
global varlist "mine_id mine_subid_1 mine_subid_2 mine_subid_3 mine_subid_4 mine_subid_5"
gen membership = 0
qui {
foreach k of numlist 1/$countmemid {
foreach var in $varlist {
replace membership = 1 if `var'==memid`k' & !missing(memid`k')
}
}
}

drop if _merge==2
drop _merge
//Check
tab membership
rename membership dunion

bys yr: egen sunion = mean(dunion)
twoway connect sunion yr
drop sunion



** III. MUNICIPAL POSTAL CODES 
******************************************
 
preserve
import excel using ./data/postalcodes.xlsx, clear firstrow
gen n = _n
*reshape long town, i(n) j(id)
save ./data/temp/postalcodes, replace
restore

replace mine_community = "Oupée" if mine_community == "Oupée " 

merge m:1 mine_community using ./data/temp/postalcodes , nogen

*rename postal_code postal_code_original

* Sometimes changes in mine community within mines over time. take modal community 

sort yr
*gen postal_code = postal_code_original
*bys mine_id : egen postal_code_mode = mode(postal_code_original) , minmode		// minmode: picks biggest community (lowest number)
*replace postal_code = postal_code_mode if postal_code==.	// impute with mode if missing postal code

xtset mineid yr 
replace postal_code = L.postal_code if postal_code==.

*gen change = postal_code ~= postal_code_original 
 
tostring postal_code,gen(posts)
gen dq2 = substr(posts,1,2)
destring dq2, replace

sort yr
bys yr: egen avp = mean(p)
 
 
** IV. RAILROAD DATA
******************************************

preserve
import excel using "./data/commuting-transport/town_railconnections.xlsx", clear firstrow
tempfile rails
save `rails'
restore


merge m:1 town1 using `rails'
drop if _merge==2
drop _merge


** Connection to railroad and tramway network
gen drail = rail_start <= yr
replace drail = . if rail_start==.

bys yr: egen avdrail = mean(drail)
 
destring tram_start , force replace
gen dtram = tram_start<= yr
bys yr: egen avdtram = mean(dtram)

 
** V. CAPITAL STOCK CONSTRUCTION
******************************************************

replace expenses_spc_prem = 0 if expenses_spc_prem ==.
*replace winv = winv+expenses_spc_prem 	// choose not to include 'depenses premiers travaux' (expenses on new mine construction) as investment - rather sunk cost of entry.

bys yr: egen agwinv = mean(winv)
twoway connect agwinv yr
 
** Construct capital stock (perpetual inventory method)
 
xtset mineid yr 
 
* estimate capital depreciation rate

foreach var of varlist *_hp horses*{
replace `var' = 0 if `var'==.
}

gen horses = horses_under + horses_surf
gen hp = k_extraction_hp+k_excavation_hp+k_ventilation_hp+k_other_hp	// total horsepower
gen ho = horses_under+horses_surf
replace hp = . if yr>1899
replace ho = . if yr>1899

reg hp L.hp if L.winv==0 & yr<1900, r
gen dep1 = _b[L.hp]						// depreciation rate of 13%
gen sedep1 = _se[L.hp]
local N_dep1 = e(N)
local r2 = string(e(r2))
local r2_dep1 = substr("`r2'",1,4)
reg hp L.hp if L.winv>0 & yr<1900	, r
gen dep2 = _b[L.hp]
gen sedep2 = _se[L.hp]
local N_dep2 = e(N)
local r2 = string(e(r2))
local r2_dep2 = substr("`r2'",1,4)
reg ho L.ho if L.winv==0 & yr<1900	, r	// depreciation rate of 15%
gen sedep3 = _se[L.ho]
gen dep3 = _b[L.ho]
local r2 = string(e(r2))
local r2_dep3 = substr("`r2'",1,4)
local N_dep3 = e(N)
reg ho L.ho if L.winv>0 & yr<1900, r
gen dep4 = _b[L.ho]
local N_dep4 = e(N)
local r2 = string(e(r2))
local r2_dep4 = substr("`r2'",1,4)
gen sedep4 = _se[L.ho]

* store depreciation estimates in a table
forvalues n = 1/4 {
rename (dep`n' sedep`n') (dep   sedep )
estpost su dep 
est store dep`n'
replace dep  = sedep 
estpost su dep
est store dep`n'_se
drop dep sedep
}

gen dep = .
label var dep "$1-\delta$"
esttab dep1 dep1_se dep2 dep2_se   using ./output/tab/table_dep.tex, replace ///
mtitle("Est." "S.E." "Est." "S.E.") prehead(\textit{Panel A: Machine horsepower  } &   \multicolumn{2}{c}{Not invested} & \multicolumn{2}{c}{Invested} \\    )    ///
cells(mean(fmt(3))  ) label booktabs nonum collabels(none) gaps f   noobs ///
prefoot( &&&&\\  R-squared  &	\multicolumn{2}{c}{`r2_dep1'} &	\multicolumn{2}{c}{`r2_dep2'} \\  Observations &	\multicolumn{2}{c}{`N_dep1'} &	\multicolumn{2}{c}{`N_dep2'} \\ &&&&\\  )  posthead( \hline &&&\\   ) 

esttab dep3 dep3_se dep4 dep4_se   using ./output/tab/table_dep.tex, append ///
mtitle("Est." "S.E." "Est." "S.E.") prehead(\hline \textit{Panel B: Equine horsepower  } &  \multicolumn{2}{c}{Not invested} & \multicolumn{2}{c}{Invested} \\    )    ///
cells(mean(fmt(3))  ) label booktabs nonum collabels(none) gaps f   noobs ///
prefoot( &&&&\\  R-squared  &	\multicolumn{2}{c}{`r2_dep3'} &	\multicolumn{2}{c}{`r2_dep4'} \\  Observations &	\multicolumn{2}{c}{`N_dep3'} &	\multicolumn{2}{c}{`N_dep4'} \\ &&&&\\  )  posthead( \hline &&&\\   ) postfoot( \hline &&&\\   ) 
drop dep
  
  
  
gen dep = 0.87	// capital depreciation rate of 13%, in line with estimate from above.
  
xtset mineid yr
replace concession_ha = 0 if concession_ha==.

* initial capital stock

foreach var of varlist k_extraction_hp k_excavation_hp horses {
gen D`var' = D.`var'	
} 
reg winv Dk_extraction_hp Dk_excavation_hp  Dhorses   yr, r		// find price per horsepower for all machine types and horses
 
local r2 = string(e(r2)) 		 
local r2_estcap = substr("`r2'" ,1,4) 
local N_estcap= e(N)

 
gen pext = _b[Dk_extraction_hp]
gen pexc = _b[Dk_excavation_hp]
gen phorse = _b[Dhorses]
gen pt = _b[yr]

gen seext = _se[Dk_extraction_hp]
gen seexc = _se[Dk_excavation_hp]
gen sehorse = _se[Dhorses]
gen set = _se[yr]

* store these estimates in a table 

foreach var in "ext" "exc" "horse" "t" { 
gen `var' = p`var'
}
estpost su ext exc horse  
est store est_capital	
foreach var in "ext" "exc" "horse" "t" { 
replace `var' = se`var'
}
estpost su ext exc horse   
est store est_capital_se

label var ext "$\Delta$ H.P. of water extraction machines"
label var exc "$\Delta$ H.P. of hauling machines"
label var horse "$\Delta$ No. of horses"

esttab est_capital est_capital_se   using ./output/tab/table_estcap.tex, replace ///
 mtitle("Est." "S.E."   ) prehead( \\  &  \multicolumn{2}{c}{Capital investment}    \\       )    ///
cells(mean(fmt(3))   ) label booktabs nonum collabels(none) gaps f   noobs ///
prefoot(  &&\\ R-squared &  \multicolumn{2}{c}{`r2_estcap'} \\ Observations & \multicolumn{2}{c}{`N_estcap'}         )  posthead( \hline  &&\\   )  postfoot( && \\ \hline  ) 
 
 
drop ext exc horse t
	


gen k =  k_extraction_hp*pext+k_excavation_hp*pexc   +phorse*horses  if yr==1845 	//  capital stock in 1845
replace k = 0 if k==. & yr==1845  
	
forvalues t = 1846/1849 {
replace k = L.k*dep+L.winv*dep if yr==`t' & L.k~=. & k==. & L.winv~=.  			// transition rule for capital
replace k = 0 if  yr==`t'&  k==.   		// new entrants
}

replace k = L2.k*dep^2+L2.winv*dep^2+L2.winv*dep if yr==1851 & L2.k~=. & L2.winv~=.		// in the years with gaps, we need to adapt the transition rule (interpolate investment)
replace k = 0 if yr==1851  & k==.   

forvalues t = 1852/1885 {
replace k = L.k*dep+L.winv*dep if yr==`t'& L.k~=. & L.winv~=.
replace k = 0 if  yr==`t'&  k==.   
}

replace k = L5.k*dep^5 + L5.winv*dep + L5.winv*dep^2 + L5.winv*dep^3 + L5.winv*dep^4 + L5.winv*dep^5  if yr==1890 & L5.k~=.& L5.winv~=.
replace k = 0 if yr==1890 & k==.  

forvalues t = 1891/1899 {
replace k = L.k*dep+L.winv*dep if yr==`t'& L.k~=. & L.winv~=.
replace k = 0 if  yr==`t'&  k==.   
}

replace k = L3.k*dep^3  + L3.winv *dep + L3.winv *dep^2  +  L3.winv*dep^3 if yr==1902 & L3.k~=.& L3.winv~=.
replace k = 0 if yr==1902   & k==.  

forvalues t = 1903/1913 {
replace k = L.k*dep+L.winv*dep if yr==`t'& L.k~=. & L.winv~=.
replace k =  0 if yr==`t'& k==. 
}
 
// take logarithms of variables 
 
foreach var of varlist q k  emp p{
gen l`var' = log(`var')
}


** VI. OTHER VARIABLES CONSTRUCTION
******************************************************

** Create numeric area codes based on postal codes

drop dq2
tostring postal_code, gen(postal_codes) 

forvalues m = 1(1)4 {
gen dq`m' = substr(postal_codes,1,`m')
encode dq`m' , gen(dq`m'id)
}

** Recession dummy

bys yr: egen qyr = sum(q)
xtset mineid yr
gen gqyr = qyr/L.qyr-1
gen recq = gqyr<0			// recession = year-to-year drop in output	(do not use 2 consecutive quarters definition because no quarterly data)
replace recq = . if gqyr ==.
gen impp = degreve_imp_pq/degreve_imp_q

* Provincial labor force

preserve
import excel using ./data/employment_province.xlsx, clear firstrow
drop if unit=="Belgium"
encode unit, gen(provid)
rename unit province
reshape long empl, i(provid) j(yr)
rename empl emp_prov
save ./data/temp/employment_province, replace
restore

* Interpolate labor force in missing years (non-census years)

preserve
clear
set obs 69
gen yr = _n+1844
expand 3		// provinces in dataset

quietly bys yr:  gen dup = cond(_N==1,0,_n)
gen province = "Namur" if dup==1
replace province = "Liege" if dup==2
replace province = "Luxembourg" if dup==3

merge m:1 yr province using ./data/temp/employment_province
drop if _merge==2
drop _merge

ipolate emp_prov yr if province=="Namur", gen(emphat_na)
ipolate emp_prov yr if province=="Liege", gen(emphat_li)
ipolate emp_prov yr if province=="Luxembourg", gen(emphat_lu)
gen emphat = emphat_na if province=="Namur"
replace emphat = emphat_lu if province=="Luxembourg"
replace emphat = emphat_li if province=="Liege"
drop emphat_*

twoway connect emp_prov yr if province=="Namur" , msize(large) || connect emphat yr if province=="Namur"
twoway connect emp_prov yr if province=="Liege" , msize(large) || connect emphat yr if province=="Liege"
twoway connect emp_prov yr if province=="Luxembourg" , msize(large) || connect emphat yr if province=="Luxembourg"
drop emp_prov
rename emphat emp_prov

save ./data/temp/employment_province, replace
restore

tab dq1
gen province = "Namur" if dq1 == "5"
replace province ="Luxembourg" if dq1=="6"
replace province ="Liege" if dq1=="4"
merge m:1 yr province using ./data/temp/employment_province
drop if _merge==2

xtset mineid yr
replace emp_prov = L.emp_prov if yr==1911
replace emp_prov = L2.emp_prov if yr==1912
replace emp_prov = L3.emp_prov if yr==1913
 

** coal quality 
	replace q_fatty_t = q_fatty_tot if q_fatty_t==.
	replace q_fatty_t = q_fat_t if q_fatty_t==.
	replace q_fatty_t = 0 if q_fatty_t==. 
	gen sfatty = q_fatty_t/q
	
	bys yr: sum sfatty
	replace q_lean_t = q_leans_t if q_lean_t==.
	replace q_lean_t = q_leanl_t if q_lean_t==.
	replace q_lean_t = q_leans_tot_t if q_lean_t==.
	bys yr: sum q_lean_t
	gen slean = q_lean_t/q

	
foreach var of varlist q_leans_mn_t q_leans_glx_t { 
 replace `var'  = 0 if `var' ==. & yr==1852 
 replace `var'  = 0 if `var' ==. & yr==1862
 }

 replace slean = (q_leans_mn_t+q_leans_glx_t) / q if yr==1852 | yr==1862
 
	
** province-level labor

rename emp_prov nemp_prov	// counts of inhabitants
bys yr: egen avndays = mean(ndays)
gen emp_prov = nemp_prov*ndays 	// days worked in province
sum nemp_prov

    
bys yr: egen totk = sum(k)
bys yr: sum k
bys yr: egen totwinv= sum(winv)
bys yr: egen totwm= sum(wm)
twoway connect totk totwm    totwinv yr,  xline(1872 1899)

* Cobb-Douglas
*replace lk = log((k)*ndays +h*ndays +1 )

drop hp
gen hp = k_extraction_hp+k_excavation_hp
gen dext = k_extraction_hp>0
gen dexc = k_excavation_hp>0
gen dhp = hp>0
replace dext = . if k_extraction_hp==.
replace dexc = . if k_excavation_hp==.
replace dhp = . if hp==.

gen lu = log(u)
gen ls = log(s)
gen lus = log(u/(u+s))
 
 
gen dk = k_extraction_hp>0 | k_excavation_hp>0
replace dk = . if k_extraction_hp==. & k_excavation_hp==.


drop slean
replace q_lean_t = 0 if q_lean_t==.
gen slean = q_lean_t/q
	
replace lemp = log(emp )
replace lwm = log(wm )
replace lk = log(k)
replace ls = log(s)
replace lu = log(u)

// cleaning vein thickness data

gen thick1 = substr(avgvein_thick,1,4)
gen thick2 = substr(avgvein_thick,7,5)

replace thick1 = "0" if thick1==""
replace thick2 = "0" if thick2==""
gen avthick12 = (real(thick1)+real(thick2))/2
tostring avthick12, replace force

replace avgvein_thick = thick1 if real(avgvein_thick)==. & real(thick1)>0 & real(thick2)==0 
replace avgvein_thick = thick2 if real(avgvein_thick)==. & real(thick2)>0 & real(thick1)==0 
replace avgvein_thick = avthick12 if real(avgvein_thick)==. & real(thick2)>0 & real(thick1)>0 

destring avgvein_thick, replace
replace avgvein_thick = avgvein_thick/1000 if avgvein_thick>100

* Cartel variables:
 
xtset mineid yr 
 
decode mineid, gen(mineids)
replace mine_community = "Jemeppes-Meuse" if mineids=="GSL" 

preserve
import excel using "./data/cartel/cartel_new.xlsx", clear firstrow sheet("data_imputed")  
keep yr mineids dcar
save ./data/temp/cartel, replace
restore
 
drop _merge
merge m:1 mineids yr using ./data/temp/cartel
tab _merge if yr>=1898 & dq1=="4" & q~=.
drop _merge
replace dcar = 0 if dq1~= "4"

xtset mineid yr

bys mineids: egen evercar = max(dcar)
replace dcar = evercar if yr<1898

replace dcar = 0 if dq1== "4" & dcar==. & yr<1898

bys yr: egen ncar = sum(dcar)
*bys yr: sum  ncar
 

gen open = q~=.
bys yr: egen nf = sum(open)
gen ndis = nf-ncar
 


replace dunion = . if yr>1907 | yr<1869 


** VII. ELECTORAL DATA
***************************************

preserve
import excel using "./data/politics/town_politics_categories.xlsx", clear firstrow
tempfile politics
save `politics'
restore
  
  
merge m:1 town1 using `politics'
drop if _merge==2
drop _merge 
  

** VIII. POPULATION CENSUS DATA
***************************************


preserve
import excel using "./data/population/town_popcensuses.xlsx", clear firstrow
tempfile population
save `population'
restore

merge m:1 town1 yr using `population'
drop if _merge==2
drop _merge

* interpolate population data

gen pop_raw = pop_adult_male + pop_adult_female
label var pop_raw "Adult population below 55 years (raw data)"

ipolate pop_raw yr, epolate gen(pop)
label var pop "Adult population below 55 years (interpolated)"

preserve 
collapse(mean) pop, by(town1 yr)
bys yr: egen popyr = sum(pop)
twoway connect popyr yr
restore 



** IX. AGRICULTURAL DATA
***************************************

// Agricultural employment
////////////////////////////////////////////////////////////////////////////////

preserve 
// Data
import excel ".\data\agriculture\Buyst Forthcoming.xlsx", clear firstrow 
brow
tempfile buyst
save `buyst'

restore 


// Agricultural production
////////////////////////////////////////////////////////////////////////////////

preserve 
// Data
import excel ".\data\agriculture\Gadisseur 1979.xlsx", clear firstrow cellrange(a2)

keep year agriculturalproduct
tempfile gadisseur
save `gadisseur'
restore 
// Agricultural trade
////////////////////////////////////////////////////////////////////////////////

preserve 
// ISIC codes
import excel ".\data\agriculture\Degrève 1982 products.xlsx", clear firstrow sheet("SITC") cellrange(a2)
keep Code Product Aggr Subgroup Title Industry
tempfile isic
save `isic'
restore 

preserve 
// Data
import excel ".\data\agriculture\Degrève 1982 data final.xlsx", clear firstrow sheet("Volume A") cellrange(a2)
tempfile vola
save `vola'
import excel ".\data\agriculture\Degrève 1982 data final.xlsx", clear firstrow sheet("Volume A") cellrange(a2)
rename Quantity imp_q
rename Value imp_pq
rename F exp_q
rename G exp_pq
rename H trans_q
rename I trans_pq

// Merge 
merge m:1 Code using `isic'
drop if Aggr == 1
rename Year year
keep if year < 1914

// Create measures of agricultural invasion

** 1 - Key grains (inspired by O'Rourke 1997 JEH)

gen keygrain = ""
replace keygrain = "Rye" if Code == 25 //Avoine
replace keygrain = "Wheat" if Code == 26 //Froment
replace keygrain = "Oats" if Code == 27 //Seigle
replace keygrain = "Barley" if Code == 31 //Orge

** 2 - ISIC group
gen agricultural = (Industry == "Agriculture and livestock production")

bys year: egen tot_imp_pq1 = total(imp_pq) if agricultural == 1
bys year: egen tot_imp_pq2 = total(imp_pq) if !missing(keygrain)

bys year: egen tot_imp_q1 = total(imp_q) if agricultural == 1
bys year: egen tot_imp_q2 = total(imp_q) if !missing(keygrain)


*twoway (line tot_imp_pq1 year) (line tot_imp_pq2 year)


// Save data
////////////////////////////////////////////////////////////////////////////////

keep year tot_imp_pq1 tot_imp_pq2 tot_imp_q1 tot_imp_q2
rename tot_imp_pq1 imp_pq_keygrains
rename tot_imp_pq2 imp_pq_isic
rename tot_imp_q1 imp_q_keygrains
rename tot_imp_q2 imp_q_isic

duplicates drop
keep if !missing(imp_pq_keygrains) & !missing(imp_pq_isic)

merge 1:1 year using `gadisseur', nogen
merge 1:1 year using `buyst', nogen

* interpolate agricultural employment data for intermittent years 

ipolate agr_emp_tot year, gen(agr_emp_imputed)

// impute years for buyst 

save ./data/temp/data_agri, replace 

restore 

gen year = yr

merge m:1 year using ./data/temp/data_agri 
drop if _merge == 2 
drop _merge 


gen imp_p_keygrains = imp_pq_keygrains / imp_q_keygrains 
gen imp_p_isic = imp_pq_isic / imp_q_isic

** GENERATE FINAL DATASET THAT IS USED FOR ANALYSIS
****************************************************

save ./data/temp/data_pf, replace

use ./data/temp/data_pf, clear

 











  

 
 
